In [6]:
import numpy as np
import pandas as pd
import seaborn as sns
% matplotlib inline
In [7]:
# Función para descarga de ficheros desde URL
import os
import urllib.request

def dl_data(url, output):
    try:        
        f = urllib.request.urlopen(url)
        print ("Downloading " + url)

        os.makedirs(os.path.dirname(output), exist_ok=True)
        with open(output, "wb") as local_file:
            local_file.write(f.read())

    except URLError:
        print ("Error", url)

Puntos culturales

In [8]:
# Descarga de lugares culturales de Seattle
url = "https://data.seattle.gov/api/views/vsxr-aydq/rows.csv?accessType=DOWNLOAD"
output1 = './data/Seattle_Cultural_Space_Inventory.csv'
dl_data(url,output1)
Downloading https://data.seattle.gov/api/views/vsxr-aydq/rows.csv?accessType=DOWNLOAD
In [9]:
import re

_underscorer1 = re.compile(r'(.)([A-Z][a-z]+)')
_underscorer2 = re.compile('([a-z0-9])([A-Z])')

def camelToSnake(s):
    subbed = _underscorer1.sub(r'\1_\2', s)
    return _underscorer2.sub(r'\1_\2', subbed).lower()
In [10]:
# Cargamos la información descargada en un dataframe
dfpois = pd.read_csv(os.path.join('./data/Seattle_Cultural_Space_Inventory.csv'))
dfpois.columns = dfpois.columns.map(lambda x: camelToSnake(x))
dfpois.head()
Out[10]:
name phone url square _feet _total neighborhood organization _type dominant _discipline year of _occupation rent vs _own age of _current _building ... stability _index (5=very stable, 1=very uncertain) control _index (5=very in control, 1 = very out of control) constituency over 50% one race specific _demographics and _community organization _leadership organization _artists closed _date closed? address location
0 Bulldog News (206) 632-6397 http://www.bulldognews.com/ 500.0 University District N Literary 1985.0 R 1930.0 ... 4.0 2.0 NaN General NaN NaN NaN 0.0 4208 University Way NE Seattle WA 98105 (47.658562, -122.313115)
1 METHOD Gallery (206) 769-1151 http://www.methodgallery.com/ 800.0 Pioneer Square Y Visual 2013.0 R 1907.0 ... 2.0 4.0 NaN no NaN NaN NaN NaN 106 Third Ave S (47.601458, -122.330209)
2 The Makery (206) 954-3497 https://themakeryseattleblog.wordpress.com 500.0 Seward Park N Arts/Cultural Training or Education 2.0 R 1940.0 ... 4.0 3.0 NaN General NaN NaN NaN 0.0 5019 52nd St Seattle WA 98118 (47.556461, -122.268508)
3 SEEDArts Studios (206) 760-4286 http://www.seedseattle.org/seedarts-studios/ 10200.0 Hillman City Y Studios 2014.0 R 1920.0 ... 4.0 3.0 NaN We reflect the diversity of our community. Yes NaN NaN NaN 5617 Rainier Ave S (47.551722, -122.278061)
4 The Royal Room (206) 906-9920 NaN 3000.0 Columbia City N Music 2011.0 R 1917.0 ... 4.0 3.0 NaN general NaN NaN NaN NaN 5000 Rainier Ave S (47.556793, -122.283927)

5 rows × 35 columns

In [11]:
dfpois.shape
Out[11]:
(1196, 35)
In [12]:
dfpois.isnull().sum()
Out[12]:
name                                                               0
phone                                                            484
url                                                              484
square _feet _total                                              539
neighborhood                                                     708
organization _type                                               541
dominant _discipline                                               2
year of _occupation                                              530
rent vs _own                                                     525
age of _current _building                                        552
length of _lease (_date)                                         869
 _year _organization _founded                                    531
number of _past _facilities                                      543
stages and _theaters                                             538
stage & _theater _seats                                          733
gallery _space                                                   540
gallery _square _feet                                            759
ada _compliant                                                   557
available _parking                                               594
street _presence                                                 531
rental _space                                                    536
alcohol _sales                                                   528
organization _mission                                           1182
funded by a&c                                                    754
funded by 4_culture                                              972
stability _index (5=very stable, 1=very uncertain)               528
control _index (5=very in control, 1 = very out of control)     1181
constituency over 50% one race                                   984
specific _demographics and _community                           1181
organization _leadership                                        1192
organization _artists                                           1193
closed _date                                                    1167
closed?                                                           16
address                                                           22
location                                                          31
dtype: int64
In [13]:
dfpois.columns
Out[13]:
Index(['name', 'phone', 'url', 'square _feet _total', 'neighborhood',
       'organization _type', 'dominant _discipline', 'year of _occupation',
       'rent vs _own', 'age of _current _building', 'length of _lease (_date)',
       ' _year _organization _founded', 'number of _past _facilities',
       'stages and _theaters', 'stage & _theater _seats ', 'gallery _space',
       'gallery _square _feet', 'ada _compliant', 'available _parking',
       'street _presence', 'rental _space ', 'alcohol _sales',
       'organization _mission', 'funded by a&c', 'funded by 4_culture',
       'stability _index (5=very stable, 1=very uncertain)',
       'control _index (5=very in control, 1 = very out of control) ',
       'constituency over 50% one race',
       'specific _demographics and _community', 'organization _leadership',
       'organization _artists', 'closed _date', 'closed?', 'address',
       'location'],
      dtype='object')
In [14]:
# Corregimos el nombre de algunas columnas
dfpois.columns = ['name', 'phone', 'url', 'square_feet_total', 'neighborhood',
       'organization_type', 'dominant_discipline', 'year_of_occupation',
       'rent_vs_own', 'age_of_current_building', 'length_of_lease(date)',
       'year_organization_founded', 'number_of_past_facilities',
       'stages_and_theaters', 'stage_&_theater_seats ', 'gallery_space',
       'gallery_square_feet', 'ada_compliant', 'available_parking',
       'street_presence', 'rental_space ', 'alcohol_sales',
       'organization_mission', 'funded_by_a&c', 'funded_by_4_culture',
       'stability_index(5=very_stable,1=very_uncertain)',
       'control_index(5=very_in_control, 1=very_out_of_control) ',
       'constituency_over_50%_one_race',
       'specific_demographics_and_community', 'organization_leadership',
       'organization_artists', 'closed_date', 'closed?', 'address',
       'location']
In [15]:
dfpois.dominant_discipline.value_counts()
Out[15]:
Performance                                 174
Visual                                      173
Arts/Cultural Training or Education         141
Multi-use                                   133
Music                                       132
Service/Supply                               94
Literary                                     77
Heritage                                     58
Community Center                             51
Studios                                      50
Arts/Cultural Administration or Advocacy     39
Cinema                                       25
Preservation                                 18
Education                                    14
Live/Work                                    12
Multi-use                                     1
Arts/Cultural Training or Education           1
Arts/Cultrual Administration or Advocacy      1
Name: dominant_discipline, dtype: int64
In [16]:
# Eliminamos aquellos registros con la columna closed? = 0
dfpois = dfpois.loc[dfpois['closed?'] == 0.0]
# Eliminamos también los registros con algunas categorías por ser lugares demasiado comunes o de interés menor
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Service/Supply']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Arts/Cultural Training or Education']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Arts/Cultrual Administration or Advocacy']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Multi-use']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Multi-use ']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Preservation']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Community Center']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Heritage']
# Y los registros con nulos en la columna dominant_discipline
dfpois = dfpois.loc[~dfpois.dominant_discipline.isnull()]
dfpois.shape
# Comprobamos que se han eliminado 1196 - 911 = 285 registros
Out[16]:
(656, 35)
In [17]:
# Nos quedamos con la columna de localización y la separamos en coordenadas
dfpois = dfpois.location.str.strip('()')   \
                   .str.split(', ', expand=True)   \
                   .rename(columns={0:'latitude', 1:'longitude'})
dfpois = dfpois.dropna()
dfpois = dfpois.astype(float)
dfpois.head()
Out[17]:
latitude longitude
0 47.658562 -122.313115
13 47.605016 -122.329573
16 47.601435 -122.330062
17 47.623673 -122.359095
18 47.602850 -122.333868
In [18]:
# Función para calcular la distancia entre dos puntos teniendo en cuenta la curvatura de la tierra (no Euclidea)
from math import radians, cos, sin, asin, sqrt, acos

def haversine(lon1, lat1, lon2, lat2):
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6373 # Radius of earth kilometers
    return c * r
In [19]:
coord = pd.read_csv('./data/Coord_EK.csv')
coord.head()
Out[19]:
element_key latitude longitude
0 1001 47.602862 -122.334703
1 1002 47.602997 -122.334538
2 1005 47.603602 -122.335382
3 1006 47.603725 -122.335171
4 1009 47.605010 -122.336669

Representamos un caso para observar la distancia de proximidad

In [20]:
sub = dfpois.loc[0,:].astype(float).to_frame().transpose()
In [21]:
# Creamos una nueva columna en el dataframe coord que indica si hay un evento próximo al element_key
# Una vez que un element_key tiene un punto cercano (<=75-200 metros) no es necesario revisar otros (break)
prueba_coord = coord.copy()
prueba_coord['poi_75m'] = 0
prueba_coord['poi_100m'] = 0
prueba_coord['poi_150m'] = 0
prueba_coord['poi_200m'] = 0
for c_index, c_row in prueba_coord.iterrows():
    for df_index, df_row in sub.iterrows():
        dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
        if dist <= 0.075: # 75 metros
            prueba_coord.at[c_index,'poi_75m'] = 1
            break
        elif dist <= 0.1:
            prueba_coord.at[c_index,'poi_100m'] = 1
            break
        elif dist <= 0.15:
            prueba_coord.at[c_index,'poi_150m'] = 1
            break
        elif dist <= 0.2:
            prueba_coord.at[c_index,'poi_200m'] = 1
            break
In [22]:
import folium

this_map = folium.Map(prefer_canvas=True, max_bounds=False)

def plotDot(df):
    df = df.copy()
    folium.CircleMarker(location=[df.latitude, df.longitude],
                        radius=5,
                        fill=True,
                        fill_opacity=0.8,
                        fill_color=usedColor,
                        color='whitesmoke',
                        weight=0.5).add_to(this_map)

usedColor = 'purple'
prueba_coord.loc[prueba_coord.poi_200m == 1].apply(plotDot, axis = 1)
usedColor = 'blue'
prueba_coord.loc[prueba_coord.poi_150m == 1].apply(plotDot, axis = 1)
usedColor = 'green'
prueba_coord.loc[prueba_coord.poi_100m == 1].apply(plotDot, axis = 1)
usedColor = 'turquoise'
prueba_coord.loc[prueba_coord.poi_75m == 1].apply(plotDot, axis = 1)
usedColor = 'red'
sub.apply(plotDot, axis = 1)
    
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
Out[22]:

El punto de interés aparece señalado en color rojo. Los parquímetros que están a una distancia inferior o igual a 75 metros son los de color turquesa. Los parquímetros que están a una distancia entre 75 y 100 metros se incluyen en color verde. Los parquímetros que están a una distancia entre 100 y 150 metros son los de color azul y los parquímetros que están a una distancia entre 150 y 200 metros se incluyen en color morado.

In [23]:
radio_dist_prox = 0.075
In [24]:
coord['poi'] = 0
for c_index, c_row in coord.iterrows():
    for df_index, df_row in dfpois.iterrows():
        dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
        if dist <= radio_dist_prox:
            coord.at[c_index,'poi'] = 1
            break
In [25]:
coord.element_key.loc[coord.poi == 1].count() / coord.shape[0] * 100
Out[25]:
32.95978905735003
In [26]:
this_map = folium.Map(prefer_canvas=True, max_bounds=False)

usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
dfpois.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.poi == 1].apply(plotDot, axis = 1)
    
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
Out[26]:

Baseball

In [27]:
# Necesario instalar previamente el paquete shapely con el siguiente comando:
# conda install -c scitools/label/archive shapely
from shapely.geometry import Point
from shapely.geometry.multipolygon import MultiPolygon
from shapely import wkt
from shapely.wkt import loads
In [28]:
url = "https://data.seattle.gov/api/views/6v75-vrvs/rows.csv?accessType=DOWNLOAD"
output1 = './data/Baseball_Field.csv'
dl_data(url,output1)
Downloading https://data.seattle.gov/api/views/6v75-vrvs/rows.csv?accessType=DOWNLOAD
In [29]:
df_base = pd.read_csv(os.path.join('./data/Baseball_Field.csv'))
In [30]:
df_base.head()
Out[30]:
PMAID the_geom RESERVED1 GIS_AREA GIS_LENGTH GIS_EDT_DT BALLFIELD_ NAME SPORT_TYPE FIELD_SURF ... FACILITY_N RESERVED2 RESERVED3 NOTES LINK_1 LINK_2 LINK_3 AMWOID SHAPE_AREA SHAPE_LEN
0 422 MULTIPOLYGON (((-122.27259129399673 47.5260192... NaN 77303.320339 1135.795831 10/21/2014 12:00:00 AM +0000 801 Rainier Beach Baseball Grass ... Ballfield 01 NaN NaN NaN NaN NaN NaN ATHLFLD-RBPF 0 0
1 391 MULTIPOLYGON (((-122.3019285062559 47.66868012... NaN 37512.326072 749.471305 10/21/2014 12:00:00 AM +0000 801 Ravenna Baseball Grass ... Ballfield 01 NaN NaN NaN NaN NaN NaN ATHLFLD-RAVPK 0 0
2 400 MULTIPOLYGON (((-122.31492060232524 47.5861955... NaN 42033.954263 813.444406 10/21/2014 12:00:00 AM +0000 801 Beacon Hill Baseball Grass ... Ballfield NaN NaN NaN NaN NaN NaN ATHLFLD-BHPG 0 0
3 292 MULTIPOLYGON (((-122.34217620405622 47.6687435... NaN 63942.730131 1038.732456 10/21/2014 12:00:00 AM +0000 801 Lower Woodland Baseball Grass ... Ballfield 06 NaN NaN NaN NaN NaN NaN ATHLFLD-WLPK-CLVR-6 0 0
4 361 MULTIPOLYGON (((-122.32555601354544 47.7200755... NaN 38644.145115 797.354520 10/21/2014 12:00:00 AM +0000 801 Northacres Baseball Grass ... Ballfield 01 NaN NaN NaN NaN NaN NaN ATHLFLD-NACPK 0 0

5 rows × 25 columns

In [31]:
df_base = df_base[['the_geom']]
pd.set_option('max_colwidth', 100000)
df_base.head(1)
Out[31]:
the_geom
0 MULTIPOLYGON (((-122.27259129399673 47.52601927298346, -122.27257752370237 47.52600642939875, -122.27254548267092 47.52600611204517, -122.27254546201019 47.52599088666039, -122.27176661993877 47.52599288660424, -122.27176653385249 47.52591907248638, -122.27177646708512 47.52584556561534, -122.27179633545914 47.52577298860192, -122.27182597064699 47.52570195617837, -122.27186512159835 47.52563306999163, -122.27191345666779 47.52556691350763, -122.27197056642476 47.52550404706956, -122.27203596712256 47.52544500315207, -122.27210910479634 47.52539028185153, -122.27218935995565 47.52534034665064, -122.27227605283169 47.52529562049316, -122.27236844913506 47.52525648220201, -122.27246576627483 47.52522326327102, -122.27256717998655 47.5251962450576, -122.27267183131303 47.52517565639998, -122.27277883387889 47.52516167167922, -122.27288728139688 47.52515440934254, -122.27299625534309 47.525153930900096, -122.27300057278396 47.52569286862945, -122.27300069339235 47.52568733009272, -122.27302109392775 47.52568714892568, -122.27302085799529 47.525708994986005, -122.27304145673172 47.52571906578899, -122.27304223415413 47.525759181479884, -122.2730613341649 47.52575990288884, -122.27305731448998 47.525890125253575, -122.27302351243998 47.52591404205778, -122.27302011466 47.52597648553648, -122.27297175637065 47.52601118492268, -122.2728795108592 47.52601194274166, -122.27284378946743 47.52603398386657, -122.27265239531906 47.52603374430984, -122.27260079738778 47.526033767405906, -122.27260052053602 47.526019509632555, -122.27259129399673 47.52601927298346)))
In [32]:
df_base.shape
Out[32]:
(122, 1)
In [33]:
# La estructura MULTIPOLYGON consta de varios poligonos y cada poligono de puntos exteriores
# Por cada element_key buscamos si al menos tiene un punto de un poligono cercano
coord['baseball'] = 0
mp_lat = []
mp_lon = []
for c_index, c_row in coord.iterrows():
    for index, row in df_base.iterrows():
        multi = loads(row['the_geom'])
        polygons = list(multi)
        for p in polygons:
            puntos = p.exterior.coords
            for p in puntos:
                latp = pd.to_numeric(p[1])
                lonp = pd.to_numeric(p[0])
                mp_lat.append(latp)
                mp_lon.append(lonp)
                dist = haversine(c_row.longitude, c_row.latitude, lonp, latp)
                if dist <= radio_dist_prox:
                    coord.at[c_index,'baseball'] = 1
                    break
In [34]:
coord.element_key.loc[coord.baseball == 1].count() / coord.shape[0] * 100
Out[34]:
0.8569545154911009
In [35]:
mp = pd.DataFrame([mp_lat, mp_lon]).astype(float).transpose()
mp.columns = ['latitude', 'longitude']
mp = mp.drop_duplicates()
mp.shape
Out[35]:
(5032, 2)
In [36]:
this_map = folium.Map(prefer_canvas=True, max_bounds=False)

usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
mp.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.baseball == 1].apply(plotDot, axis = 1)
    
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
Out[36]:

Tenis

Repetimos el mismo proceso para pistas de tenis

In [37]:
url = "https://data.seattle.gov/api/views/p8fp-a7qi/rows.csv?accessType=DOWNLOAD"
output1 = './data/Tennis_Court_Point.csv'
dl_data(url,output1)
Downloading https://data.seattle.gov/api/views/p8fp-a7qi/rows.csv?accessType=DOWNLOAD
In [38]:
df_tenis = pd.read_csv(os.path.join('./data/Tennis_Court_Point.csv'))
df_tenis.head()
Out[38]:
PMAID the_geom GIS_AREA GIS_LENGTH GIS_EDT_DT SPORTCOURT NAME SPORT_TYPE COURT_ID FACILITY_I FACILITY_N COURT_SIZE RESERVED_1 RESERVED_2 RESERVED_3 LOCATION_I ORIG_FID
0 322 POINT (-122.35535254024099 47.63126282170146) 7008.017763 353.107404 04/01/2015 12:00:00 AM +0000 0 NaN Tennis NaN NaN NaN NaN NaN NaN NaN NaN 6
1 322 POINT (-122.35505634945147 47.63158818432751) 5919.709996 334.916999 04/01/2015 12:00:00 AM +0000 0 NaN Tennis NaN NaN NaN NaN NaN NaN NaN NaN 7
2 488 POINT (-122.30446228709225 47.67656332710144) 1440.812667 151.848564 04/01/2015 12:00:00 AM +0000 0 NaN Tennis NaN NaN NaN NaN NaN NaN NaN NaN 12
3 292 POINT (-122.34340864856209 47.669367127426725) 6562.129583 350.175016 04/01/2015 12:00:00 AM +0000 0 NaN Tennis NaN NaN NaN NaN NaN NaN NaN NaN 110
4 292 POINT (-122.3431911505528 47.669035558387705) 6527.883015 349.449458 04/01/2015 12:00:00 AM +0000 0 NaN Tennis NaN NaN NaN NaN NaN NaN NaN NaN 114
In [39]:
df_tenis = df_tenis.the_geom.str.strip('POINT ()')   \
                   .str.split(' ', expand=True)   \
                   .rename(columns={0:'longitude', 1:'latitude'})
df_tenis = df_tenis.astype(float)
df_tenis.head()
Out[39]:
longitude latitude
0 -122.355353 47.631263
1 -122.355056 47.631588
2 -122.304462 47.676563
3 -122.343409 47.669367
4 -122.343191 47.669036
In [40]:
coord['tennis'] = 0
for c_index, c_row in coord.iterrows():
    for df_index, df_row in df_tenis.iterrows():
        dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
        if dist <= radio_dist_prox:
            coord.at[c_index,'tennis'] = 1
            break
In [41]:
coord.element_key.loc[coord.tennis == 1].count() / coord.shape[0] * 100
Out[41]:
0.1977587343441002
In [42]:
this_map = folium.Map(prefer_canvas=True, max_bounds=False)

usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
df_tenis.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.tennis == 1].apply(plotDot, axis = 1)
    
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
Out[42]:
In [43]:
coord.loc[coord.tennis == 1]
Out[43]:
element_key latitude longitude poi baseball tennis
567 34569 47.615938 -122.320117 0 1 1
700 37185 47.615347 -122.320143 0 1 1
854 54730 47.615945 -122.320692 0 1 1

Piscinas

In [44]:
url = "https://data.seattle.gov/api/views/ppq2-qxkx/rows.csv?accessType=DOWNLOAD"
output1 = './data/Swimming_Pools.csv'
dl_data(url,output1)
Downloading https://data.seattle.gov/api/views/ppq2-qxkx/rows.csv?accessType=DOWNLOAD
In [45]:
df_sp = pd.read_csv(os.path.join('./data/Swimming_Pools.csv'))
df_sp.head()
Out[45]:
the_geom COORDINATO ADDRESS NAME PHONE OFFICIAL_N INDOOR_OUT FULL_NAME POINT_X POINT_Y GIS_EDT_DT LATITUDE LONGITUDE PMAID LOCID AMWO_ID RES1 RES2
0 POINT (-122.35795026785668 47.63626286559663) Janet Wilson 1920 1st Ave West Queen Anne Pool 386-4282 Queen Anne Pool Indoor Queen Anne Pool 1.264556e+06 235812.484537 11/30/1899 12:00:00 AM +0000 47.636263 -122.357950 337 NaN NaN NaN NaN
1 POINT (-122.30240182803597 47.606887600677034) Kristen Schuler 500 23rd Ave Evers Pool 684-4766 Evers Memorial Pool Indoor Medgar Evers Pool 1.278044e+06 224832.999934 11/30/1899 12:00:00 AM +0000 47.606888 -122.302402 353 NaN NaN NaN NaN
2 POINT (-122.27033751275907 47.524766415040474) Donna Sammons 8825 Rainier Ave S Rainier Beach Pool 386-1944 Rainier Beach Pool Indoor Rainier Beach Pool 1.285393e+06 194733.953177 11/30/1899 12:00:00 AM +0000 47.524766 -122.270338 434 NaN NaN NaN NaN
3 POINT (-122.36916224498255 47.52800132362959) Nancy Eisner 2801 SW Thistle St Southwest Pool 233-7295 Southwest Pool Indoor Southwest Pool 1.261005e+06 196385.281239 11/30/1899 12:00:00 AM +0000 47.528001 -122.369162 3996 NaN NaN NaN NaN
4 POINT (-122.376161943172 47.677540264436196) Angela Eddy 1471 NW 67th Street Ballard Pool 684-4094 Ballard Pool Indoor Captain William R. Ballard Pool 1.260369e+06 250955.515603 11/30/1899 12:00:00 AM +0000 47.677540 -122.376162 498 NaN NaN NaN NaN
In [46]:
df_sp = df_sp[['LATITUDE','LONGITUDE']]
df_sp.columns = ['latitude', 'longitude']
In [47]:
coord['swim_pool'] = 0
for c_index, c_row in coord.iterrows():
    for df_index, df_row in df_sp.iterrows():
        dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
        if dist <= radio_dist_prox:
            coord.at[c_index,'swim_pool'] = 1
            break
In [48]:
coord.element_key.loc[coord.swim_pool == 1].count() / coord.shape[0] * 100
Out[48]:
0.0
In [49]:
coord = coord.drop(columns=['swim_pool'])

Baloncesto

In [50]:
url = "https://data.seattle.gov/api/views/dxss-26kb/rows.csv?accessType=DOWNLOAD"
output1 = './data/Basketball_Court_Point.csv'
dl_data(url,output1)
Downloading https://data.seattle.gov/api/views/dxss-26kb/rows.csv?accessType=DOWNLOAD
In [51]:
df_bb = pd.read_csv(os.path.join('./data/Basketball_Court_Point.csv'))
df_bb.head()
Out[51]:
GIS_LENGTH GIS_AREA PMAID the_geom GIS_EDT_DT SPORTCOURT NAME SPORT_TYPE COURT_ID FACILITY_I FACILITY_N COURT_SIZE RESERVED_1 RESERVED_2 RESERVED_3 LOCATION_I ORIG_FID
0 327.852253 6237.155112 114 POINT (-122.30849123459399 47.56944067743952) 04/01/2015 12:00:00 AM +0000 0 NaN Basketball NaN NaN NaN Full NaN NaN NaN NaN 17
1 321.136129 6197.019558 390 POINT (-122.30789666684107 47.600349509910195) 04/01/2015 12:00:00 AM +0000 0 NaN Basketball NaN NaN NaN Full NaN NaN NaN NaN 18
2 182.821568 1822.460392 382 POINT (-122.31416450879583 47.71649703913903) 04/01/2015 12:00:00 AM +0000 0 NaN Basketball NaN NaN NaN Half NaN NaN NaN NaN 19
3 238.815565 3452.310432 450 POINT (-122.36327480720777 47.56132920631473) 04/01/2015 12:00:00 AM +0000 0 NaN Basketball NaN NaN NaN Full NaN NaN NaN NaN 20
4 357.406488 7142.272128 458 POINT (-122.36979006956886 47.53334748473792) 04/01/2015 12:00:00 AM +0000 0 NaN Basketball NaN NaN NaN Full NaN NaN NaN NaN 21
In [52]:
df_bb = df_bb.the_geom.str.strip('POINT ()')   \
                   .str.split(' ', expand=True)   \
                   .rename(columns={0:'longitude', 1:'latitude'})
df_bb = df_bb.astype(float)
df_bb.head()
Out[52]:
longitude latitude
0 -122.308491 47.569441
1 -122.307897 47.600350
2 -122.314165 47.716497
3 -122.363275 47.561329
4 -122.369790 47.533347
In [53]:
coord['basket'] = 0
for c_index, c_row in coord.iterrows():
    for df_index, df_row in df_bb.iterrows():
        dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
        if dist <= radio_dist_prox:
            coord.at[c_index,'basket'] = 1
            break
In [54]:
coord.element_key.loc[coord.basket == 1].count() / coord.shape[0] * 100
Out[54]:
0.3295978905735003
In [55]:
this_map = folium.Map(prefer_canvas=True, max_bounds=False)

usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
df_bb.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.basket == 1].apply(plotDot, axis = 1)
    
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
Out[55]:

Soccer

In [56]:
url = "https://data.seattle.gov/api/views/vre6-ceji/rows.csv?accessType=DOWNLOAD"
output1 = './data/Soccer_Field.csv'
dl_data(url,output1)
Downloading https://data.seattle.gov/api/views/vre6-ceji/rows.csv?accessType=DOWNLOAD
In [57]:
df_soc = pd.read_csv(os.path.join('./data/Soccer_Field.csv'))
df_soc.head()
Out[57]:
NAME the_geom ADDRESS DIVISION SOCCER OVERLAPPIN E_SURFACE E_LIGHTS PMAID LOCID AMWO_ID RES1 RES2
0 Decatur EL POINT (-122.28435384474307 47.68561364506833) 7711 43rd Ave NE SSD 2 Y Grass No NaN NaN NaN NaN NaN
1 East Queen Anne Playground POINT (-122.35349681556443 47.636140546290584) 1912 Warren Ave N Central 1 N Grass No 329.0 NaN NaN NaN NaN
2 Pinehurst Playground POINT (-122.31463878544118 47.71625503063297) 12029 14th Ave NE North 1 N Grass No 382.0 NaN NaN NaN NaN
3 Ravenna-Eckstein Park POINT (-122.30540806725641 47.67716865127399) NaN North 1 N Grass No 488.0 NaN NaN NaN NaN
4 Green Lake Park POINT (-122.32766609679587 47.67947645167757) 7201 E Green Lake Way North 1 Y Grass No 307.0 NaN NaN NaN NaN
In [58]:
df_soc = df_soc.the_geom.str.strip('POINT ()')   \
                   .str.split(' ', expand=True)   \
                   .rename(columns={0:'longitude', 1:'latitude'})
df_soc = df_soc.astype(float)
df_soc.head()
Out[58]:
longitude latitude
0 -122.284354 47.685614
1 -122.353497 47.636141
2 -122.314639 47.716255
3 -122.305408 47.677169
4 -122.327666 47.679476
In [59]:
coord['soccer'] = 0
for c_index, c_row in coord.iterrows():
    for df_index, df_row in df_soc.iterrows():
        dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
        if dist <= radio_dist_prox:
            coord.at[c_index,'soccer'] = 1
            break
In [60]:
coord.element_key.loc[coord.soccer == 1].count() / coord.shape[0] * 100
Out[60]:
0.26367831245880025
In [61]:
this_map = folium.Map(prefer_canvas=True, max_bounds=False)

usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
df_soc.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.soccer == 1].apply(plotDot, axis = 1)
    
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
Out[61]:

Atletismo

In [62]:
url = "https://data.seattle.gov/api/views/y5mt-y5i8/rows.csv?accessType=DOWNLOAD"
output1 = './data/Track_Fields.csv'
dl_data(url,output1)
Downloading https://data.seattle.gov/api/views/y5mt-y5i8/rows.csv?accessType=DOWNLOAD
In [63]:
df_ath = pd.read_csv(os.path.join('./data/Track_Fields.csv'))
df_ath.head()
Out[63]:
ADDRESS NAME the_geom DIVISION TRACK OVERLAPPIN E_SURFACE E_LIGHTS PMAID LOCID AMWO_ID RES1 RES2
0 3013 S Mt Baker Blvd Franklin HS POINT (-122.29541428116494 47.576761033398434) SSD 1 Y Synthetic No NaN NaN NaN NaN NaN
1 11051 34th Ave NE Jane Addams POINT (-122.29345154595929 47.70896175799827) SSD 1 Y Synthetic Yes NaN NaN NaN NaN NaN
2 550 Phiney Ave N Woodland Park - Field 7 POINT (-122.3416118198449 47.66970572000668) North 1 N Synthetic Yes 292.0 NaN NaN NaN NaN
3 5511 15th Ave S Cleveland Playfield POINT (-122.31558010418237 47.5520865432037) South 1 N Grass No 404.0 NaN NaN NaN NaN
4 4432 35th Ave Sw West Seattle Stadium POINT (-122.37415252022458 47.56307498209784) South 1 N Grass Yes 472.0 NaN NaN NaN NaN
In [64]:
df_ath = df_ath.the_geom.str.strip('POINT ()')   \
                   .str.split(' ', expand=True)   \
                   .rename(columns={0:'longitude', 1:'latitude'})
df_ath = df_ath.astype(float)
df_ath.head()
Out[64]:
longitude latitude
0 -122.295414 47.576761
1 -122.293452 47.708962
2 -122.341612 47.669706
3 -122.315580 47.552087
4 -122.374153 47.563075
In [65]:
coord['athletism'] = 0
for c_index, c_row in coord.iterrows():
    for df_index, df_row in df_ath.iterrows():
        dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
        if dist <= 0.25:
            coord.at[c_index,'athletism'] = 1
            break
In [66]:
coord.element_key.loc[coord.athletism == 1].count() / coord.shape[0] * 100
Out[66]:
0.0
In [67]:
coord = coord.drop(columns=['athletism'])
In [68]:
coord.head()
Out[68]:
element_key latitude longitude poi baseball tennis basket soccer
0 1001 47.602862 -122.334703 1 0 0 0 0
1 1002 47.602997 -122.334538 1 0 0 0 0
2 1005 47.603602 -122.335382 0 0 0 0 0
3 1006 47.603725 -122.335171 0 0 0 0 0
4 1009 47.605010 -122.336669 1 0 0 0 0
In [69]:
coord.to_csv('./data/Coord_cult_&_sport.csv', index=False)